{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data = pd.read_csv(\"./users_FE.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1000 entries, 0 to 999\n",
      "Data columns (total 7 columns):\n",
      "Unnamed: 0         1000 non-null int64\n",
      "LocaleId           1000 non-null float64\n",
      "BirthYearInt       1000 non-null float64\n",
      "GenderId           1000 non-null float64\n",
      "JoinedYearMonth    1000 non-null float64\n",
      "TimezoneInt        1000 non-null float64\n",
      "cluster_100        1000 non-null int64\n",
      "dtypes: float64(5), int64(2)\n",
      "memory usage: 54.8 KB\n"
     ]
    }
   ],
   "source": [
    "data = data[:1000]\n",
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = data.drop(['cluster_100'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "CH_scores ={\n",
    "    'CH_score20':0,\n",
    "    'CH_score40':0,\n",
    "    'CH_score80':0\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "km = MiniBatchKMeans(20)\n",
    "km.fit(data)\n",
    "cluster_20  = km.predict(data)\n",
    "CH_scores['CH_score20'] = metrics.silhouette_score(data,cluster_20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "km = MiniBatchKMeans(40)\n",
    "km.fit(data)\n",
    "cluster_40 = km.predict(data)\n",
    "CH_scores['CH_score40'] = metrics.silhouette_score(data,cluster_40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "km = MiniBatchKMeans(80)\n",
    "km.fit(data)\n",
    "cluster_80 = km.predict(data)\n",
    "CH_scores['CH_score80'] = metrics.silhouette_score(data,cluster_80)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'CH_score20': 0.51625261685993096,\n",
       " 'CH_score40': 0.49941896293450566,\n",
       " 'CH_score80': 0.47614184261393122}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "CH_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data[\"cluster_20\"] = cluster_20\n",
    "data[\"cluster_40\"] = cluster_40\n",
    "data[\"cluster_80\"] = cluster_80"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data.to_csv(\"users_clusering_20_40_80.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>LocaleId</th>\n",
       "      <th>BirthYearInt</th>\n",
       "      <th>GenderId</th>\n",
       "      <th>JoinedYearMonth</th>\n",
       "      <th>TimezoneInt</th>\n",
       "      <th>cluster_20</th>\n",
       "      <th>cluster_40</th>\n",
       "      <th>cluster_80</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000016</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>0.000045</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>39</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000025</td>\n",
       "      <td>-0.000022</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000025</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000022</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>20</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>21</td>\n",
       "      <td>0.000072</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>22</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>23</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>24</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>-0.000022</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>25</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000023</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>26</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>-0.000022</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>27</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>28</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>29</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>-0.000036</td>\n",
       "      <td>3</td>\n",
       "      <td>33</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>970</th>\n",
       "      <td>970</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000027</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>971</th>\n",
       "      <td>971</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>972</th>\n",
       "      <td>972</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>973</th>\n",
       "      <td>973</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>974</th>\n",
       "      <td>974</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>975</th>\n",
       "      <td>975</td>\n",
       "      <td>0.000072</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>976</th>\n",
       "      <td>976</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>977</th>\n",
       "      <td>977</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>978</th>\n",
       "      <td>978</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>-0.000029</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>979</th>\n",
       "      <td>979</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980</th>\n",
       "      <td>980</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>981</th>\n",
       "      <td>981</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>982</th>\n",
       "      <td>982</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>983</th>\n",
       "      <td>983</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>984</th>\n",
       "      <td>984</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>985</th>\n",
       "      <td>985</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>986</th>\n",
       "      <td>986</td>\n",
       "      <td>0.000075</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987</th>\n",
       "      <td>987</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>-0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>988</th>\n",
       "      <td>988</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>989</th>\n",
       "      <td>989</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000016</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>990</th>\n",
       "      <td>990</td>\n",
       "      <td>0.000072</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000027</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>991</th>\n",
       "      <td>991</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>992</th>\n",
       "      <td>992</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>993</th>\n",
       "      <td>993</td>\n",
       "      <td>0.000015</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>994</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>995</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>996</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000024</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>997</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>998</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>999</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>-0.000036</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0  LocaleId  BirthYearInt  GenderId  JoinedYearMonth  \\\n",
       "0             0  0.000020      0.000027  0.000019         0.000026   \n",
       "1             1  0.000020      0.000027  0.000019         0.000026   \n",
       "2             2  0.000028      0.000027  0.000019         0.000026   \n",
       "3             3  0.000028      0.000027  0.000038         0.000027   \n",
       "4             4  0.000020      0.000027  0.000038         0.000026   \n",
       "5             5  0.000045      0.000027  0.000038         0.000027   \n",
       "6             6  0.000020      0.000027  0.000019         0.000026   \n",
       "7             7  0.000020      0.000027  0.000038         0.000026   \n",
       "8             8  0.000020      0.000027  0.000019         0.000026   \n",
       "9             9  0.000020      0.000027  0.000038         0.000026   \n",
       "10           10  0.000028      0.000027  0.000019         0.000026   \n",
       "11           11  0.000028      0.000027  0.000038         0.000026   \n",
       "12           12  0.000020      0.000027  0.000038         0.000026   \n",
       "13           13  0.000028      0.000027  0.000038         0.000026   \n",
       "14           14  0.000000      0.000000  0.000019         0.000025   \n",
       "15           15  0.000027      0.000027  0.000019         0.000026   \n",
       "16           16  0.000000      0.000027  0.000019         0.000025   \n",
       "17           17  0.000020      0.000027  0.000019         0.000026   \n",
       "18           18  0.000020      0.000027  0.000019         0.000026   \n",
       "19           19  0.000028      0.000000  0.000019         0.000022   \n",
       "20           20  0.000020      0.000027  0.000019         0.000026   \n",
       "21           21  0.000072      0.000027  0.000038         0.000026   \n",
       "22           22  0.000028      0.000027  0.000019         0.000026   \n",
       "23           23  0.000028      0.000027  0.000038         0.000026   \n",
       "24           24  0.000020      0.000027  0.000019         0.000027   \n",
       "25           25  0.000028      0.000027  0.000019         0.000023   \n",
       "26           26  0.000020      0.000027  0.000019         0.000027   \n",
       "27           27  0.000028      0.000027  0.000038         0.000026   \n",
       "28           28  0.000028      0.000027  0.000038         0.000026   \n",
       "29           29  0.000028      0.000027  0.000019         0.000027   \n",
       "..          ...       ...           ...       ...              ...   \n",
       "970         970  0.000000      0.000027  0.000019         0.000026   \n",
       "971         971  0.000020      0.000027  0.000019         0.000027   \n",
       "972         972  0.000028      0.000027  0.000038         0.000026   \n",
       "973         973  0.000020      0.000027  0.000019         0.000027   \n",
       "974         974  0.000020      0.000027  0.000019         0.000026   \n",
       "975         975  0.000072      0.000027  0.000019         0.000027   \n",
       "976         976  0.000000      0.000027  0.000019         0.000027   \n",
       "977         977  0.000028      0.000027  0.000038         0.000026   \n",
       "978         978  0.000028      0.000027  0.000019         0.000027   \n",
       "979         979  0.000028      0.000027  0.000019         0.000027   \n",
       "980         980  0.000028      0.000027  0.000038         0.000027   \n",
       "981         981  0.000000      0.000027  0.000019         0.000026   \n",
       "982         982  0.000028      0.000027  0.000019         0.000026   \n",
       "983         983  0.000028      0.000027  0.000019         0.000026   \n",
       "984         984  0.000028      0.000027  0.000038         0.000026   \n",
       "985         985  0.000020      0.000027  0.000038         0.000026   \n",
       "986         986  0.000075      0.000027  0.000038         0.000026   \n",
       "987         987  0.000028      0.000027  0.000019         0.000027   \n",
       "988         988  0.000028      0.000027  0.000038         0.000026   \n",
       "989         989  0.000028      0.000027  0.000038         0.000027   \n",
       "990         990  0.000072      0.000027  0.000019         0.000026   \n",
       "991         991  0.000028      0.000027  0.000019         0.000026   \n",
       "992         992  0.000000      0.000027  0.000038         0.000026   \n",
       "993         993  0.000015      0.000027  0.000038         0.000027   \n",
       "994         994  0.000028      0.000027  0.000019         0.000026   \n",
       "995         995  0.000000      0.000027  0.000019         0.000026   \n",
       "996         996  0.000028      0.000027  0.000019         0.000024   \n",
       "997         997  0.000028      0.000027  0.000019         0.000027   \n",
       "998         998  0.000020      0.000027  0.000019         0.000026   \n",
       "999         999  0.000028      0.000027  0.000019         0.000027   \n",
       "\n",
       "     TimezoneInt  cluster_20  cluster_40  cluster_80  \n",
       "0       0.000036           3          39          30  \n",
       "1       0.000031           3          39          30  \n",
       "2      -0.000018           3          39          30  \n",
       "3       0.000016           3          39          30  \n",
       "4       0.000031           3          39          30  \n",
       "5       0.000018           3          39          30  \n",
       "6       0.000031           3          39          30  \n",
       "7       0.000031           3          39          30  \n",
       "8       0.000031           3          39          30  \n",
       "9       0.000031           3          39          30  \n",
       "10      0.000031           3          39          30  \n",
       "11      0.000031           3          39          42  \n",
       "12      0.000031           3          39          42  \n",
       "13      0.000018           3          39          42  \n",
       "14     -0.000022           3           3          42  \n",
       "15     -0.000018           3           3          42  \n",
       "16     -0.000018           3           3          42  \n",
       "17      0.000031           3           3          42  \n",
       "18      0.000020           3           3          42  \n",
       "19     -0.000018           3           3          42  \n",
       "20      0.000031           3           3          42  \n",
       "21      0.000018           3           3          42  \n",
       "22      0.000031           3           3           5  \n",
       "23      0.000031           3           3           5  \n",
       "24     -0.000022           3           3           5  \n",
       "25     -0.000018           3           3           5  \n",
       "26     -0.000022           3           3           5  \n",
       "27     -0.000018           3           3           5  \n",
       "28      0.000031           3           3           5  \n",
       "29     -0.000036           3          33           5  \n",
       "..           ...         ...         ...         ...  \n",
       "970    -0.000027          16          12          22  \n",
       "971     0.000031          16          12          22  \n",
       "972    -0.000036          16          12          22  \n",
       "973     0.000020          16          12          22  \n",
       "974     0.000031          16          12          22  \n",
       "975     0.000036          16          12          56  \n",
       "976     0.000036          16          12          56  \n",
       "977     0.000031          16          12          56  \n",
       "978    -0.000029          16          12          56  \n",
       "979     0.000031          16          12          56  \n",
       "980     0.000031          16          12          56  \n",
       "981    -0.000018          16          12          56  \n",
       "982     0.000031          16          12          56  \n",
       "983     0.000031          16          12          56  \n",
       "984    -0.000018          16          12          56  \n",
       "985     0.000031          16          12          56  \n",
       "986    -0.000036          16          12          56  \n",
       "987    -0.000036          16          12          56  \n",
       "988     0.000031          16          12          56  \n",
       "989     0.000016          16          12           9  \n",
       "990    -0.000027          16          12           9  \n",
       "991     0.000031          16          12           9  \n",
       "992    -0.000018          16          12           9  \n",
       "993     0.000036          16          12           9  \n",
       "994     0.000018          16          12           9  \n",
       "995     0.000036          16          12           9  \n",
       "996    -0.000018          16          12           9  \n",
       "997     0.000031          16          12           9  \n",
       "998    -0.000036          16          12           9  \n",
       "999    -0.000036          16          12           9  \n",
       "\n",
       "[1000 rows x 9 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
